install.packages("RSDA", dependencies=TRUE)
devtools::install_github("PROMiDAT/RSDA")
ex3 <- read.sym.table(file = 'tsym1.csv', header=TRUE, sep=';',dec='.', row.names=1)
ex3
#> # A tibble: 7 Ă— 7
#> F1 F2 F3 F4 F5 F6 F7
#> <dbl> <symblc_n> <symbl> <dbl> <symblc_> <symblc_n> <symblc_n>
#> 1 2.8 [1.00 : 2.00] <hist> 6 {a,d} [0.00 : 90.00] [9.00 : 24.00]
#> 2 1.4 [3.00 : 9.00] <hist> 8 {b,c,d} [-90.00 : 98.00] [-9.00 : 9.00]
#> 3 3.2 [-1.00 : 4.00] <hist> -7 {a,b} [65.00 : 90.00] [65.00 : 70.00]
#> 4 -2.1 [0.00 : 2.00] <hist> 0 {a,b,c,d} [45.00 : 89.00] [25.00 : 67.00]
#> 5 -3 [-4.00 : -2.00] <hist> -9.5 {b} [20.00 : 40.00] [9.00 : 40.00]
#> 6 0.1 [10.00 : 21.00] <hist> -1 {a,d} [5.00 : 8.00] [5.00 : 8.00]
#> 7 9 [4.00 : 21.00] <hist> 0.5 {a} [3.14 : 6.76] [4.00 : 6.00]
##How to save a Symbolic Table in a CSV file with RSDA?
write.sym.table(ex3, file = 'tsymtemp.csv', sep = ';',dec = '.',
row.names = TRUE, col.names = TRUE)
data(example3)
example3
#> # A tibble: 7 Ă— 7
#> F1 F2 F3 F4 F5 F6
#> <dbl> <symblc_n> <symblc_m> <dbl> <symblc_> <symblc_n>
#> 1 2.8 [1.00 : 2.00] M1:0.10 M2:0.70 M3:0.20 6 {e,g,i,k} [0.00 : 90.00]
#> 2 1.4 [3.00 : 9.00] M1:0.60 M2:0.30 M3:0.10 8 {a,b,c,d} [-90.00 : 98.00]
#> 3 3.2 [-1.00 : 4.00] M1:0.20 M2:0.20 M3:0.60 -7 {2,b,1,c} [65.00 : 90.00]
#> 4 -2.1 [0.00 : 2.00] M1:0.90 M2:0.00 M3:0.10 0 {a,3,4,c} [45.00 : 89.00]
#> 5 -3 [-4.00 : -2.00] M1:0.60 M2:0.00 M3:0.40 -9.5 {e,g,i,k} [20.00 : 40.00]
#> 6 0.1 [10.00 : 21.00] M1:0.00 M2:0.70 M3:0.30 -1 {e,1,i} [5.00 : 8.00]
#> 7 9 [4.00 : 21.00] M1:0.20 M2:0.20 M3:0.60 0.5 {e,a,2} [3.14 : 6.76]
#> # ℹ 1 more variable: F7 <symblc_n>
example3[2,]
#> # A tibble: 1 Ă— 7
#> F1 F2 F3 F4 F5 F6
#> <dbl> <symblc_n> <symblc_m> <dbl> <symblc_s> <symblc_n>
#> 1 1.4 [3.00 : 9.00] M1:0.60 M2:0.30 M3:0.10 8 {a,b,c,d} [-90.00 : 98.00]
#> # ℹ 1 more variable: F7 <symblc_n>
example3[,3]
#> # A tibble: 7 Ă— 1
#> F3
#> <symblc_m>
#> 1 M1:0.10 M2:0.70 M3:0.20
#> 2 M1:0.60 M2:0.30 M3:0.10
#> 3 M1:0.20 M2:0.20 M3:0.60
#> 4 M1:0.90 M2:0.00 M3:0.10
#> 5 M1:0.60 M2:0.00 M3:0.40
#> 6 M1:0.00 M2:0.70 M3:0.30
#> 7 M1:0.20 M2:0.20 M3:0.60
example3[2:3,5]
#> # A tibble: 2 Ă— 1
#> F5
#> <symblc_s>
#> 1 {a,b,c,d}
#> 2 {2,b,1,c}
example3$F1
#> [1] 2.8 1.4 3.2 -2.1 -3.0 0.1 9.0
data(ex1_db2so)
ex1_db2so
#> state sex county group age
#> 1 Florida M 2 6 3
#> 2 California F 4 3 4
#> 3 Texas M 12 3 4
#> 4 Florida F 2 3 4
#> 5 Texas M 4 6 4
#> 6 Texas F 2 3 3
#> 7 Florida M 6 3 4
#> 8 Florida F 2 6 4
#> 9 California M 2 3 6
#> 10 California F 21 3 4
#> 11 California M 2 3 4
#> 12 California M 2 6 7
#> 13 Texas F 23 3 4
#> 14 Florida M 2 3 4
#> 15 Florida F 12 7 4
#> 16 Texas M 2 3 8
#> 17 California F 3 7 9
#> 18 California M 2 3 11
#> 19 California M 1 3 11
The classic.to.sym function allows to convert a
traditional table into a symbolic one, to this we must indicate the
following parameters.
x = a data.frameconcept = variables to be used as a conceptvariables = variables to be used, conceptible with
tidyselect optionsdefault.numeric = function that will be used by default
for numerical values (sym.interval)default.categorical = functions to be used by default
for categorical values (sym.model)result <- classic.to.sym(x = ex1_db2so,
concept = c(state, sex),
variables = c(county, group, age))
result
#> # A tibble: 6 Ă— 3
#> county group age
#> <symblc_n> <symblc_n> <symblc_n>
#> 1 [3.00 : 21.00] [3.00 : 7.00] [4.00 : 9.00]
#> 2 [1.00 : 2.00] [3.00 : 6.00] [4.00 : 11.00]
#> 3 [2.00 : 12.00] [3.00 : 7.00] [4.00 : 4.00]
#> 4 [2.00 : 6.00] [3.00 : 6.00] [3.00 : 4.00]
#> 5 [2.00 : 23.00] [3.00 : 3.00] [3.00 : 4.00]
#> 6 [2.00 : 12.00] [3.00 : 6.00] [4.00 : 8.00]
We can add new variables indicating the type we want them to be.
result <- classic.to.sym(x = ex1_db2so,
concept = c("state", "sex"),
variables = c(county, group, age),
age_hist = sym.histogram(age, breaks = pretty(ex1_db2so$age, 5)))
result
#> # A tibble: 6 Ă— 4
#> age_hist county group age
#> <symblc_h> <symblc_n> <symblc_n> <symblc_n>
#> 1 <hist> [3.00 : 21.00] [3.00 : 7.00] [4.00 : 9.00]
#> 2 <hist> [1.00 : 2.00] [3.00 : 6.00] [4.00 : 11.00]
#> 3 <hist> [2.00 : 12.00] [3.00 : 7.00] [4.00 : 4.00]
#> 4 <hist> [2.00 : 6.00] [3.00 : 6.00] [3.00 : 4.00]
#> 5 <hist> [2.00 : 23.00] [3.00 : 3.00] [3.00 : 4.00]
#> 6 <hist> [2.00 : 12.00] [3.00 : 6.00] [4.00 : 8.00]
data(USCrime)
head(USCrime)
#> state fold population householdsize racepctblack racePctWhite racePctAsian
#> 1 8 1 0.19 0.33 0.02 0.90 0.12
#> 2 53 1 0.00 0.16 0.12 0.74 0.45
#> 3 24 1 0.00 0.42 0.49 0.56 0.17
#> 4 34 1 0.04 0.77 1.00 0.08 0.12
#> 5 42 1 0.01 0.55 0.02 0.95 0.09
#> 6 6 1 0.02 0.28 0.06 0.54 1.00
#> racePctHisp agePct12t21 agePct12t29 agePct16t24 agePct65up numbUrban pctUrban
#> 1 0.17 0.34 0.47 0.29 0.32 0.20 1.0
#> 2 0.07 0.26 0.59 0.35 0.27 0.02 1.0
#> 3 0.04 0.39 0.47 0.28 0.32 0.00 0.0
#> 4 0.10 0.51 0.50 0.34 0.21 0.06 1.0
#> 5 0.05 0.38 0.38 0.23 0.36 0.02 0.9
#> 6 0.25 0.31 0.48 0.27 0.37 0.04 1.0
#> medIncome pctWWage pctWFarmSelf pctWInvInc pctWSocSec pctWPubAsst pctWRetire
#> 1 0.37 0.72 0.34 0.60 0.29 0.15 0.43
#> 2 0.31 0.72 0.11 0.45 0.25 0.29 0.39
#> 3 0.30 0.58 0.19 0.39 0.38 0.40 0.84
#> 4 0.58 0.89 0.21 0.43 0.36 0.20 0.82
#> 5 0.50 0.72 0.16 0.68 0.44 0.11 0.71
#> 6 0.52 0.68 0.20 0.61 0.28 0.15 0.25
#> medFamInc perCapInc whitePerCap blackPerCap indianPerCap AsianPerCap
#> 1 0.39 0.40 0.39 0.32 0.27 0.27
#> 2 0.29 0.37 0.38 0.33 0.16 0.30
#> 3 0.28 0.27 0.29 0.27 0.07 0.29
#> 4 0.51 0.36 0.40 0.39 0.16 0.25
#> 5 0.46 0.43 0.41 0.28 0.00 0.74
#> 6 0.62 0.72 0.76 0.77 0.28 0.52
#> OtherPerCap HispPerCap NumUnderPov PctPopUnderPov PctLess9thGrade
#> 1 0.36 0.41 0.08 0.19 0.10
#> 2 0.22 0.35 0.01 0.24 0.14
#> 3 0.28 0.39 0.01 0.27 0.27
#> 4 0.36 0.44 0.01 0.10 0.09
#> 5 0.51 0.48 0.00 0.06 0.25
#> 6 0.48 0.60 0.01 0.12 0.13
#> PctNotHSGrad PctBSorMore PctUnemployed PctEmploy PctEmplManu PctEmplProfServ
#> 1 0.18 0.48 0.27 0.68 0.23 0.41
#> 2 0.24 0.30 0.27 0.73 0.57 0.15
#> 3 0.43 0.19 0.36 0.58 0.32 0.29
#> 4 0.25 0.31 0.33 0.71 0.36 0.45
#> 5 0.30 0.33 0.12 0.65 0.67 0.38
#> 6 0.12 0.80 0.10 0.65 0.19 0.77
#> PctOccupManu PctOccupMgmtProf MalePctDivorce MalePctNevMarr FemalePctDiv
#> 1 0.25 0.52 0.68 0.40 0.75
#> 2 0.42 0.36 1.00 0.63 0.91
#> 3 0.49 0.32 0.63 0.41 0.71
#> 4 0.37 0.39 0.34 0.45 0.49
#> 5 0.42 0.46 0.22 0.27 0.20
#> 6 0.06 0.91 0.49 0.57 0.61
#> TotalPctDiv PersPerFam PctFam2Par PctKids2Par PctYoungKids2Par PctTeen2Par
#> 1 0.75 0.35 0.55 0.59 0.61 0.56
#> 2 1.00 0.29 0.43 0.47 0.60 0.39
#> 3 0.70 0.45 0.42 0.44 0.43 0.43
#> 4 0.44 0.75 0.65 0.54 0.83 0.65
#> 5 0.21 0.51 0.91 0.91 0.89 0.85
#> 6 0.58 0.44 0.62 0.69 0.87 0.53
#> PctWorkMomYoungKids PctWorkMom NumIlleg PctIlleg NumImmig PctImmigRecent
#> 1 0.74 0.76 0.04 0.14 0.03 0.24
#> 2 0.46 0.53 0.00 0.24 0.01 0.52
#> 3 0.71 0.67 0.01 0.46 0.00 0.07
#> 4 0.85 0.86 0.03 0.33 0.02 0.11
#> 5 0.40 0.60 0.00 0.06 0.00 0.03
#> 6 0.30 0.43 0.00 0.11 0.04 0.30
#> PctImmigRec5 PctImmigRec8 PctImmigRec10 PctRecentImmig PctRecImmig5
#> 1 0.27 0.37 0.39 0.07 0.07
#> 2 0.62 0.64 0.63 0.25 0.27
#> 3 0.06 0.15 0.19 0.02 0.02
#> 4 0.20 0.30 0.31 0.05 0.08
#> 5 0.07 0.20 0.27 0.01 0.02
#> 6 0.35 0.43 0.47 0.50 0.50
#> PctRecImmig8 PctRecImmig10 PctSpeakEnglOnly PctNotSpeakEnglWell
#> 1 0.08 0.08 0.89 0.06
#> 2 0.25 0.23 0.84 0.10
#> 3 0.04 0.05 0.88 0.04
#> 4 0.11 0.11 0.81 0.08
#> 5 0.04 0.05 0.88 0.05
#> 6 0.56 0.57 0.45 0.28
#> PctLargHouseFam PctLargHouseOccup PersPerOccupHous PersPerOwnOccHous
#> 1 0.14 0.13 0.33 0.39
#> 2 0.16 0.10 0.17 0.29
#> 3 0.20 0.20 0.46 0.52
#> 4 0.56 0.62 0.85 0.77
#> 5 0.16 0.19 0.59 0.60
#> 6 0.25 0.19 0.29 0.53
#> PersPerRentOccHous PctPersOwnOccup PctPersDenseHous PctHousLess3BR MedNumBR
#> 1 0.28 0.55 0.09 0.51 0.5
#> 2 0.17 0.26 0.20 0.82 0.0
#> 3 0.43 0.42 0.15 0.51 0.5
#> 4 1.00 0.94 0.12 0.01 0.5
#> 5 0.37 0.89 0.02 0.19 0.5
#> 6 0.18 0.39 0.26 0.73 0.0
#> HousVacant PctHousOccup PctHousOwnOcc PctVacantBoarded PctVacMore6Mos
#> 1 0.21 0.71 0.52 0.05 0.26
#> 2 0.02 0.79 0.24 0.02 0.25
#> 3 0.01 0.86 0.41 0.29 0.30
#> 4 0.01 0.97 0.96 0.60 0.47
#> 5 0.01 0.89 0.87 0.04 0.55
#> 6 0.02 0.84 0.30 0.16 0.28
#> MedYrHousBuilt PctHousNoPhone PctWOFullPlumb OwnOccLowQuart OwnOccMedVal
#> 1 0.65 0.14 0.06 0.22 0.19
#> 2 0.65 0.16 0.00 0.21 0.20
#> 3 0.52 0.47 0.45 0.18 0.17
#> 4 0.52 0.11 0.11 0.24 0.21
#> 5 0.73 0.05 0.14 0.31 0.31
#> 6 0.25 0.02 0.05 0.94 1.00
#> OwnOccHiQuart RentLowQ RentMedian RentHighQ MedRent MedRentPctHousInc
#> 1 0.18 0.36 0.35 0.38 0.34 0.38
#> 2 0.21 0.42 0.38 0.40 0.37 0.29
#> 3 0.16 0.27 0.29 0.27 0.31 0.48
#> 4 0.19 0.75 0.70 0.77 0.89 0.63
#> 5 0.30 0.40 0.36 0.38 0.38 0.22
#> 6 1.00 0.67 0.63 0.68 0.62 0.47
#> MedOwnCostPctInc MedOwnCostPctIncNoMtg NumInShelters NumStreet PctForeignBorn
#> 1 0.46 0.25 0.04 0 0.12
#> 2 0.32 0.18 0.00 0 0.21
#> 3 0.39 0.28 0.00 0 0.14
#> 4 0.51 0.47 0.00 0 0.19
#> 5 0.51 0.21 0.00 0 0.11
#> 6 0.59 0.11 0.00 0 0.70
#> PctBornSameState PctSameHouse85 PctSameCity85 PctSameState85 LandArea PopDens
#> 1 0.42 0.50 0.51 0.64 0.12 0.26
#> 2 0.50 0.34 0.60 0.52 0.02 0.12
#> 3 0.49 0.54 0.67 0.56 0.01 0.21
#> 4 0.30 0.73 0.64 0.65 0.02 0.39
#> 5 0.72 0.64 0.61 0.53 0.04 0.09
#> 6 0.42 0.49 0.73 0.64 0.01 0.58
#> PctUsePubTrans LemasPctOfficDrugUn ViolentCrimesPerPop
#> 1 0.20 0.32 0.20
#> 2 0.45 0.00 0.67
#> 3 0.02 0.00 0.43
#> 4 0.28 0.00 0.12
#> 5 0.02 0.00 0.03
#> 6 0.10 0.00 0.14
result <- classic.to.sym(x = USCrime,
concept = state,
variables= c(NumInShelters,
NumImmig,
ViolentCrimesPerPop),
ViolentCrimesPerPop_hist = sym.histogram(ViolentCrimesPerPop,
breaks = pretty(USCrime$ViolentCrimesPerPop,5)))
result
#> # A tibble: 46 Ă— 4
#> ViolentCrimesPerPop_hist NumInShelters NumImmig ViolentCrimesPerPop
#> <symblc_h> <symblc_n> <symblc_n> <symblc_n>
#> 1 <hist> [0.00 : 0.32] [0.00 : 0.04] [0.01 : 1.00]
#> 2 <hist> [0.01 : 0.18] [0.01 : 0.09] [0.05 : 0.36]
#> 3 <hist> [0.00 : 1.00] [0.00 : 0.57] [0.05 : 0.57]
#> 4 <hist> [0.00 : 0.08] [0.00 : 0.02] [0.02 : 1.00]
#> 5 <hist> [0.00 : 1.00] [0.00 : 1.00] [0.01 : 1.00]
#> 6 <hist> [0.00 : 0.68] [0.00 : 0.23] [0.07 : 0.75]
#> 7 <hist> [0.00 : 0.79] [0.00 : 0.14] [0.00 : 0.94]
#> 8 <hist> [0.01 : 0.01] [0.01 : 0.01] [0.37 : 0.37]
#> 9 <hist> [1.00 : 1.00] [0.39 : 0.39] [1.00 : 1.00]
#> 10 <hist> [0.00 : 0.52] [0.00 : 1.00] [0.06 : 1.00]
#> # ℹ 36 more rows
data("ex_mcfa1")
head(ex_mcfa1)
#> suspect age hair eyes region
#> 1 1 42 h_red e_brown Bronx
#> 2 2 20 h_black e_green Bronx
#> 3 3 64 h_brown e_brown Brooklyn
#> 4 4 55 h_blonde e_brown Bronx
#> 5 5 4 h_brown e_green Manhattan
#> 6 6 61 h_blonde e_green Bronx
sym.table <- classic.to.sym(x = ex_mcfa1,
concept = suspect,
variables=c(hair,
eyes,
region),
default.categorical = sym.set)
sym.table
#> # A tibble: 100 Ă— 3
#> hair eyes region
#> <symblc_s> <symblc_s> <symblc_s>
#> 1 {h_red} {e_brown,e_black} {Bronx}
#> 2 {h_black,h_blonde} {e_green,e_black} {Bronx,Manhattan}
#> 3 {h_brown,h_white} {e_brown,e_green} {Brooklyn,Queens}
#> 4 {h_blonde} {e_brown,e_black} {Bronx,Manhattan}
#> 5 {h_brown,h_red} {e_green} {Manhattan,Bronx}
#> 6 {h_blonde,h_white} {e_green,e_blue} {Bronx,Queens}
#> 7 {h_white,h_red} {e_black,e_blue} {Queens,Bronx}
#> 8 {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#> 9 {h_blonde,h_white} {e_black,e_brown} {Brooklyn,Bronx}
#> 10 {h_brown,h_black} {e_brown,e_green} {Manhattan,Bronx}
#> # ℹ 90 more rows
We can modify the function that will be applied by default to the categorical variables
sym.table <- classic.to.sym(x = ex_mcfa1,
concept = suspect,
default.categorical = sym.set)
sym.table
#> # A tibble: 100 Ă— 4
#> age hair eyes region
#> <symblc_n> <symblc_s> <symblc_s> <symblc_s>
#> 1 [22.00 : 42.00] {h_red} {e_brown,e_black} {Bronx}
#> 2 [20.00 : 57.00] {h_black,h_blonde} {e_green,e_black} {Bronx,Manhattan}
#> 3 [29.00 : 64.00] {h_brown,h_white} {e_brown,e_green} {Brooklyn,Queens}
#> 4 [14.00 : 55.00] {h_blonde} {e_brown,e_black} {Bronx,Manhattan}
#> 5 [4.00 : 47.00] {h_brown,h_red} {e_green} {Manhattan,Bronx}
#> 6 [32.00 : 61.00] {h_blonde,h_white} {e_green,e_blue} {Bronx,Queens}
#> 7 [49.00 : 61.00] {h_white,h_red} {e_black,e_blue} {Queens,Bronx}
#> 8 [8.00 : 32.00] {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#> 9 [39.00 : 67.00] {h_blonde,h_white} {e_black,e_brown} {Brooklyn,Bronx}
#> 10 [50.00 : 68.00] {h_brown,h_black} {e_brown,e_green} {Manhattan,Bronx}
#> # ℹ 90 more rows
hani3101 <- SDS.to.RSDA(file.path = "hani3101.sds")
#> Preprocessing file
#> Converting data to JSON format
#> Processing variable 1: R3101
#> Processing variable 2: RNINO12
#> Processing variable 3: RNINO3
#> Processing variable 4: RNINO4
#> Processing variable 5: RNINO34
#> Processing variable 6: RSOI
hani3101
#> # A tibble: 32 Ă— 6
#> R3101 RNINO12
#> <symblc_m> <symblc_m>
#> 1 X2:0.21 X4:0.18 X3:0.15 X5:... X1:0.17 X2:0.83 X3:0.00
#> 2 X2:0.30 X4:0.14 X3:0.19 X5:... X1:0.00 X2:0.25 X3:0.75
#> 3 X2:0.16 X4:0.12 X3:0.20 X5:... X1:0.67 X2:0.33 X3:0.00
#> 4 X2:0.13 X4:0.15 X3:0.22 X5:... X1:0.17 X2:0.83 X3:0.00
#> 5 X2:0.14 X4:0.14 X3:0.18 X5:... X1:0.42 X2:0.58 X3:0.00
#> 6 X2:0.26 X4:0.06 X3:0.23 X5:... X1:0.00 X2:0.67 X3:0.33
#> 7 X2:0.28 X4:0.14 X3:0.10 X5:... X1:0.00 X2:1.00 X3:0.00
#> 8 X2:0.25 X4:0.15 X3:0.19 X5:... X1:0.00 X2:1.00 X3:0.00
#> 9 X2:0.20 X4:0.15 X3:0.19 X5:... X1:0.00 X2:1.00 X3:0.00
#> 10 X2:0.21 X4:0.16 X3:0.31 X5:... X1:0.08 X2:0.92 X3:0.00
#> # ℹ 22 more rows
#> # ℹ 4 more variables: RNINO3 <symblc_m>, RNINO4 <symblc_m>, RNINO34 <symblc_m>,
#> # RSOI <symblc_m>
# We can save the file in CSV to RSDA format as follows:
write.sym.table(hani3101,
file='hani3101.csv',
sep=';',
dec='.',
row.names=TRUE,
col.names=TRUE)
abalone <- SODAS.to.RSDA("abalone.xml")
#> Processing variable 1: LENGTH
#> Processing variable 2: DIAMETER
#> Processing variable 3: HEIGHT
#> Processing variable 4: WHOLE_WEIGHT
#> Processing variable 5: SHUCKED_WEIGHT
#> Processing variable 6: VISCERA_WEIGHT
#> Processing variable 7: SHELL_WEIGHT
abalone
#> # A tibble: 24 Ă— 7
#> LENGTH DIAMETER HEIGHT WHOLE_WEIGHT SHUCKED_WEIGHT
#> <symblc_n> <symblc_n> <symblc_n> <symblc_n> <symblc_n>
#> 1 [0.28 : 0.66] [0.20 : 0.48] [0.07 : 0.18] [0.08 : 1.37] [0.03 : 0.64]
#> 2 [0.30 : 0.74] [0.22 : 0.58] [0.02 : 1.13] [0.15 : 2.25] [0.06 : 1.16]
#> 3 [0.34 : 0.78] [0.26 : 0.63] [0.06 : 0.23] [0.20 : 2.66] [0.07 : 1.49]
#> 4 [0.39 : 0.82] [0.30 : 0.65] [0.10 : 0.25] [0.26 : 2.51] [0.11 : 1.23]
#> 5 [0.40 : 0.74] [0.32 : 0.60] [0.10 : 0.24] [0.35 : 2.20] [0.12 : 0.84]
#> 6 [0.45 : 0.80] [0.38 : 0.63] [0.14 : 0.22] [0.64 : 2.53] [0.16 : 0.93]
#> 7 [0.49 : 0.72] [0.36 : 0.58] [0.12 : 0.21] [0.68 : 2.12] [0.16 : 0.82]
#> 8 [0.55 : 0.70] [0.46 : 0.58] [0.18 : 0.22] [1.21 : 1.81] [0.32 : 0.71]
#> 9 [0.08 : 0.24] [0.06 : 0.18] [0.01 : 0.06] [0.00 : 0.07] [0.00 : 0.03]
#> 10 [0.13 : 0.58] [0.10 : 0.45] [0.00 : 0.15] [0.01 : 0.89] [0.00 : 0.50]
#> # ℹ 14 more rows
#> # ℹ 2 more variables: VISCERA_WEIGHT <symblc_n>, SHELL_WEIGHT <symblc_n>
write.sym.table(abalone,
file='abalone.csv',
sep=';',
dec='.',
row.names = TRUE,
col.names = TRUE)
data(example3)
mean(example3$F1)
#> [1] 1.628571
mean(example3[,1])
#> [1] 1.628571
mean(example3$F2)
#> [1] 5
mean(example3[,2])
#> [1] 5
mean(example3$F2,method = "interval")
#> <symbolic_interval[1]>
#> [1] [1.86 : 8.14]
mean(example3[,2],method = "interval")
#> <symbolic_interval[1]>
#> [1] [1.86 : 8.14]
median(example3$F1)
#> [1] 1.4
median(example3[,1])
#> [1] 1.4
median(example3$F2)
#> [1] 1.5
median(example3[,2])
#> [1] 1.5
median(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [5.00 : 89.00]
median(example3[,6], method = 'interval')
#> <symbolic_interval[1]>
#> [1] [5.00 : 89.00]
var(example3[,1])
#> [1] 15.98238
var(example3[,2])
#> [1] 90.66667
var(example3$F6)
#> [1] 1872.358
var(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [2,408.97 : 1,670.51]
var(example3$F6, method = 'billard')
#> [1] 1355.143
sd(example3$F1)
#> [1] 3.997797
sd(example3$F2)
#> [1] 6.733003
sd(example3$F6)
#> [1] 30.59704
sd(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [49.08 : 40.87]
sd(example3$F6, method = 'billard')
#> [1] 36.81226
cor(example3$F1, example3$F4)
#> [1] 0.2864553
cor(example3[,1], example3[,4])
#> [,1]
#> [1,] 0.2864553
cor(example3$F2, example3$F6, method = 'centers')
#> [1] -0.6693648
cor(example3$F2, example3$F6, method = 'billard')
#> [1] -0.6020041
library(ggpolypath)
#> Loading required package: ggplot2
data(oils)
oils <- RSDA:::to.v3(RSDA:::to.v2(oils))
sym.radar.plot(oils[2:3,])
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0, label = round(min(real.value), : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#> a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.25, label = inverse.rescale(0.25, : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#> a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.5, label = inverse.rescale(0.5, : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#> a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.75, label = inverse.rescale(0.75, : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#> a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 1, label = round(max(real.value), : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#> a single row.
#> Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
#> font family not found in Windows font database
sym.radar.plot(oils[2:5,])
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0, label = round(min(real.value), : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#> a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.25, label = inverse.rescale(0.25, : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#> a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.5, label = inverse.rescale(0.5, : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#> a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.75, label = inverse.rescale(0.75, : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#> a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 1, label = round(max(real.value), : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#> a single row.
#> Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
#> font family not found in Windows font database
res <- interval.histogram.plot(oils[,2],
n.bins = 4,
col = c(2,3,4,5))
res
#> $frequency
#> [1] 25 49 1 25
#>
#> $histogram
#> [,1]
#> [1,] 0.7
#> [2,] 1.9
#> [3,] 3.1
#> [4,] 4.3
res <- interval.histogram.plot(oils[,3],
n.bins = 3,
main = "Histogram",
col = c(2, 3, 4))
res
#> $frequency
#> [1] 50 25 25
#>
#> $histogram
#> [,1]
#> [1,] 0.7
#> [2,] 1.9
#> [3,] 3.1
data("oils")
DM <- sym.dist.interval(sym.data = oils[,1:4],
method = "Gowda.Diday")
model <- hclust(DM)
plot(model, hang = -1)
DM <- sym.dist.interval(sym.data= oils[,1:4],
method = "Ichino")
model <- hclust(DM)
plot(model, hang = -1)
DM <- sym.dist.interval(sym.data = oils[,c(1,2,4)],
gamma = 0.5,
method = "Hausdorff",
normalize = FALSE,
SpanNormalize = TRUE,
euclidea = TRUE,
q = 2)
model <- hclust(DM)
plot(model, hang = -1)
data(int_prost_train)
data(int_prost_test)
res.cm <- sym.lm(formula = lpsa~., sym.data = int_prost_train, method = 'cm')
res.cm
#>
#> Call:
#> stats::lm(formula = formula, data = centers)
#>
#> Coefficients:
#> (Intercept) lcavol lweight age lbph svi
#> 0.411537 0.579327 0.614128 -0.018659 0.143918 0.730937
#> lcp gleason pgg45
#> -0.205536 -0.030924 0.009507
pred.cm <- sym.predict(model = res.cm, new.sym.data = int_prost_test)
RMSE.L(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.7229999
RMSE.U(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.7192467
R2.L(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.501419
R2.U(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.5058389
deter.coefficient(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.4962964
data(int_prost_train)
data(int_prost_test)
res.cm.lasso <- sym.glm(sym.data = int_prost_train,
response = 9,
method = 'cm',
alpha = 1,
nfolds = 10,
grouped = TRUE)
pred.cm.lasso <- sym.predict(res.cm.lasso,
response = 9,
int_prost_test,
method = 'cm')
plot(res.cm.lasso)
plot(res.cm.lasso$glmnet.fit, "lambda", label=TRUE)
RMSE.L(int_prost_test$lpsa,pred.cm.lasso)
#> [1] 0.7087091
RMSE.U(int_prost_test$lpsa,pred.cm.lasso)
#> [1] 0.705288
R2.L(int_prost_test$lpsa,pred.cm.lasso)
#> [1] 0.5207597
R2.U(int_prost_test$lpsa,pred.cm.lasso)
#> [1] 0.5248207
deter.coefficient(int_prost_test$lpsa, pred.cm.lasso)
#> [1] 0.4943982
data(int_prost_train)
data(int_prost_test)
res.cm.ridge <- sym.glm(sym.data = int_prost_train,
response = 9,
method = 'cm',
alpha = 0,
nfolds = 10,
grouped = TRUE)
pred.cm.ridge <- sym.predict(res.cm.ridge,
response = 9,
int_prost_test,
method = 'cm')
plot(res.cm.ridge)
plot(res.cm.ridge$glmnet.fit, "lambda", label=TRUE)
RMSE.L(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.703543
RMSE.U(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.7004145
R2.L(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.5286114
R2.U(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.5322683
deter.coefficient(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.4808652
data("oils")
res <- sym.pca(oils,'centers')
plot(res, choix = "ind")
plot(res, choix = "var")
res <- sym.pca(oils,'tops')
plot(res, choix = "ind")
res <- sym.pca(oils, 'principal.curves')
plot(res, choix = "ind")
res <- sym.pca(oils,'optimized.distance')
plot(res, choix = "ind")
plot(res, choix = "var")
res <- sym.pca(oils,'optimized.variance')
plot(res, choix = "ind")
plot(res, choix = "var")
data("ex_mcfa1")
ex_mcfa1
#> suspect age hair eyes region
#> 1 1 42 h_red e_brown Bronx
#> 2 2 20 h_black e_green Bronx
#> 3 3 64 h_brown e_brown Brooklyn
#> 4 4 55 h_blonde e_brown Bronx
#> 5 5 4 h_brown e_green Manhattan
#> 6 6 61 h_blonde e_green Bronx
#> 7 7 61 h_white e_black Queens
#> 8 8 32 h_blonde e_brown Manhattan
#> 9 9 39 h_blonde e_black Brooklyn
#> 10 10 50 h_brown e_brown Manhattan
#> 11 11 41 h_red e_blue Manhattan
#> 12 12 35 h_blonde e_green Brooklyn
#> 13 13 56 h_blonde e_brown Bronx
#> 14 14 52 h_red e_brown Queens
#> 15 15 55 h_red e_green Brooklyn
#> 16 16 25 h_brown e_brown Queens
#> 17 17 52 h_blonde e_brown Brooklyn
#> 18 18 28 h_red e_brown Manhattan
#> 19 19 21 h_white e_blue Manhattan
#> 20 20 66 h_black e_black Brooklyn
#> 21 21 67 h_blonde e_brown Queens
#> 22 22 13 h_white e_blue Brooklyn
#> 23 23 39 h_brown e_green Manhattan
#> 24 24 47 h_black e_green Brooklyn
#> 25 25 54 h_blonde e_brown Bronx
#> 26 26 75 h_brown e_blue Brooklyn
#> 27 27 3 h_white e_green Manhattan
#> 28 28 40 h_white e_green Manhattan
#> 29 29 58 h_red e_blue Queens
#> 30 30 41 h_brown e_green Bronx
#> 31 31 25 h_white e_black Brooklyn
#> 32 32 75 h_blonde e_blue Manhattan
#> 33 33 58 h_white e_brown Bronx
#> 34 34 61 h_white e_brown Manhattan
#> 35 35 52 h_white e_blue Bronx
#> 36 36 19 h_red e_black Queens
#> 37 37 58 h_red e_black Bronx
#> 38 38 46 h_black e_green Manhattan
#> 39 39 74 h_brown e_black Manhattan
#> 40 40 26 h_blonde e_brown Brooklyn
#> 41 41 63 h_blonde e_blue Queens
#> 42 42 40 h_brown e_black Queens
#> 43 43 65 h_black e_brown Brooklyn
#> 44 44 51 h_blonde e_brown Brooklyn
#> 45 45 15 h_white e_black Brooklyn
#> 46 46 32 h_blonde e_brown Bronx
#> 47 47 68 h_white e_black Manhattan
#> 48 48 51 h_white e_black Queens
#> 49 49 14 h_red e_green Queens
#> 50 50 72 h_white e_brown Brooklyn
#> 51 51 7 h_red e_blue Brooklyn
#> 52 52 22 h_red e_brown Bronx
#> 53 53 52 h_red e_brown Brooklyn
#> 54 54 62 h_brown e_green Bronx
#> 55 55 41 h_black e_brown Queens
#> 56 56 32 h_black e_black Manhattan
#> 57 57 58 h_brown e_brown Queens
#> 58 58 25 h_black e_brown Queens
#> 59 59 70 h_blonde e_green Brooklyn
#> 60 60 64 h_brown e_blue Queens
#> 61 61 25 h_white e_blue Bronx
#> 62 62 42 h_black e_black Brooklyn
#> 63 63 56 h_red e_black Brooklyn
#> 64 64 41 h_blonde e_black Brooklyn
#> 65 65 8 h_white e_black Manhattan
#> 66 66 7 h_black e_green Brooklyn
#> 67 67 42 h_white e_brown Queens
#> 68 68 10 h_white e_blue Manhattan
#> 69 69 60 h_brown e_black Bronx
#> 70 70 52 h_blonde e_brown Brooklyn
#> 71 71 39 h_brown e_blue Manhattan
#> 72 72 69 h_brown e_green Queens
#> 73 73 67 h_blonde e_green Manhattan
#> 74 74 46 h_red e_black Brooklyn
#> 75 75 72 h_black e_black Queens
#> 76 76 66 h_red e_blue Queens
#> 77 77 4 h_black e_blue Manhattan
#> 78 78 62 h_black e_green Brooklyn
#> 79 79 10 h_blonde e_blue Bronx
#> 80 80 16 h_blonde e_black Manhattan
#> 81 81 59 h_blonde e_brown Bronx
#> 82 82 63 h_blonde e_blue Manhattan
#> 83 83 54 h_red e_blue Queens
#> 84 84 14 h_brown e_blue Brooklyn
#> 85 85 48 h_black e_green Manhattan
#> 86 86 59 h_blonde e_black Bronx
#> 87 87 73 h_blonde e_black Bronx
#> 88 88 51 h_brown e_brown Bronx
#> 89 89 14 h_white e_black Bronx
#> 90 90 58 h_blonde e_black Queens
#> 91 91 56 h_red e_green Manhattan
#> 92 92 26 h_red e_blue Brooklyn
#> 93 93 59 h_brown e_black Manhattan
#> 94 94 27 h_white e_green Manhattan
#> 95 95 38 h_black e_green Manhattan
#> 96 96 5 h_blonde e_green Bronx
#> 97 97 14 h_black e_blue Queens
#> 98 98 13 h_black e_brown Manhattan
#> 99 99 54 h_white e_blue Brooklyn
#> 100 100 66 h_white e_green Manhattan
#> 101 1 22 h_red e_black Bronx
#> 102 2 57 h_blonde e_black Manhattan
#> 103 3 29 h_white e_green Queens
#> 104 4 14 h_blonde e_black Manhattan
#> 105 5 47 h_red e_green Bronx
#> 106 6 32 h_white e_blue Queens
#> 107 7 49 h_red e_blue Bronx
#> 108 8 8 h_white e_black Brooklyn
#> 109 9 67 h_white e_brown Bronx
#> 110 10 68 h_black e_green Bronx
#> 111 11 15 h_black e_brown Manhattan
#> 112 12 46 h_white e_brown Bronx
#> 113 13 68 h_white e_black Manhattan
#> 114 14 55 h_blonde e_blue Manhattan
#> 115 15 7 h_white e_green Bronx
#> 116 16 10 h_black e_brown Brooklyn
#> 117 17 49 h_red e_blue Manhattan
#> 118 18 12 h_brown e_blue Brooklyn
#> 119 19 41 h_white e_blue Bronx
#> 120 20 10 h_brown e_blue Bronx
#> 121 21 12 h_white e_green Manhattan
#> 122 22 53 h_white e_blue Manhattan
#> 123 23 5 h_black e_black Manhattan
#> 124 24 46 h_brown e_black Queens
#> 125 25 14 h_brown e_black Queens
#> 126 26 55 h_white e_green Brooklyn
#> 127 27 53 h_red e_brown Manhattan
#> 128 28 31 h_black e_brown Manhattan
#> 129 29 31 h_blonde e_brown Queens
#> 130 30 55 h_brown e_black Brooklyn
sym.table <- classic.to.sym(x = ex_mcfa1,
concept = suspect,
default.categorical = sym.set)
sym.table
#> # A tibble: 100 Ă— 4
#> age hair eyes region
#> <symblc_n> <symblc_s> <symblc_s> <symblc_s>
#> 1 [22.00 : 42.00] {h_red} {e_brown,e_black} {Bronx}
#> 2 [20.00 : 57.00] {h_black,h_blonde} {e_green,e_black} {Bronx,Manhattan}
#> 3 [29.00 : 64.00] {h_brown,h_white} {e_brown,e_green} {Brooklyn,Queens}
#> 4 [14.00 : 55.00] {h_blonde} {e_brown,e_black} {Bronx,Manhattan}
#> 5 [4.00 : 47.00] {h_brown,h_red} {e_green} {Manhattan,Bronx}
#> 6 [32.00 : 61.00] {h_blonde,h_white} {e_green,e_blue} {Bronx,Queens}
#> 7 [49.00 : 61.00] {h_white,h_red} {e_black,e_blue} {Queens,Bronx}
#> 8 [8.00 : 32.00] {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#> 9 [39.00 : 67.00] {h_blonde,h_white} {e_black,e_brown} {Brooklyn,Bronx}
#> 10 [50.00 : 68.00] {h_brown,h_black} {e_brown,e_green} {Manhattan,Bronx}
#> # ℹ 90 more rows
res <- sym.mcfa(sym.table, c(2,3))
mcfa.scatterplot(res[,2], res[,3], sym.data = sym.table, pos.var = c(2,3))
res <- sym.mcfa(sym.table, c(2,3,4))
mcfa.scatterplot(res[,2], res[,3], sym.data = sym.table, pos.var = c(2,3,4))
datos <- oils
datos
#> # A tibble: 8 Ă— 4
#> GRA FRE IOD SAP
#> * <symblc_n> <symblc_n> <symblc_n> <symblc_n>
#> 1 [0.93 : 0.94] [-27.00 : -18.00] [170.00 : 204.00] [118.00 : 196.00]
#> 2 [0.93 : 0.94] [-5.00 : -4.00] [192.00 : 208.00] [188.00 : 197.00]
#> 3 [0.92 : 0.92] [-6.00 : -1.00] [99.00 : 113.00] [189.00 : 198.00]
#> 4 [0.92 : 0.93] [-6.00 : -4.00] [104.00 : 116.00] [187.00 : 193.00]
#> 5 [0.92 : 0.92] [-25.00 : -15.00] [80.00 : 82.00] [189.00 : 193.00]
#> 6 [0.91 : 0.92] [0.00 : 6.00] [79.00 : 90.00] [187.00 : 196.00]
#> 7 [0.86 : 0.87] [30.00 : 38.00] [40.00 : 48.00] [190.00 : 199.00]
#> 8 [0.86 : 0.86] [22.00 : 32.00] [53.00 : 77.00] [190.00 : 202.00]
x <- sym.umap(datos)
x
#> V1 V2 V3 V4
#> 1 0.9860083 0.89991401 4.81728195 -5.6939003
#> 2 0.9409857 0.94503993 4.86241180 -5.7390228
#> 3 1.0867854 0.79916125 4.71653749 -5.5931990
#> 4 1.0020044 0.88405328 4.80142746 -5.6780750
#> 5 0.7703159 1.11545494 5.03283501 -5.9094492
#> 6 0.8456396 1.04124504 4.95861039 -5.8351153
#> 7 1.0881362 0.79794760 4.71532875 -5.5919997
#> 8 0.9933712 0.89259317 4.80998754 -5.6866934
#> 9 -13.5306445 -3.57850533 -4.01434432 -5.8940695
#> 10 -13.3549924 -3.56905434 -3.79301239 -5.8334233
#> 11 -13.4368926 -3.61103936 -3.81192915 -5.9564014
#> 12 -13.3649649 -3.63123114 -3.66378278 -5.9824984
#> 13 -13.4910968 -3.77148384 -4.06414225 -5.8479219
#> 14 -13.4004207 -3.84754944 -4.02658245 -5.8584225
#> 15 -13.4657605 -3.80210620 -3.95877829 -5.6372791
#> 16 -13.4768570 -3.74360475 -4.03796673 -5.6784801
#> 17 -12.6450142 -3.85978722 -3.31464060 -5.3839912
#> 18 -12.7215340 -3.71468386 -3.23811022 -5.4026483
#> 19 -12.7993953 -3.71922070 -3.11148579 -5.3571457
#> 20 -12.6734235 -3.87007425 -3.23548338 -5.3761628
#> 21 -12.6345385 -3.89884907 -3.06121497 -5.1794001
#> 22 -12.5118205 -3.85102283 -3.14736628 -5.4415171
#> 23 -12.4345290 -3.85188296 -3.24578351 -5.5081651
#> 24 -12.4968778 -3.75756660 -3.24193458 -5.6869147
#> 25 -12.9426769 -3.97562104 -3.40031322 -5.7212044
#> 26 -12.9060964 -3.93211667 -3.27701076 -5.7365000
#> 27 -12.6815614 -3.91649130 -3.41905624 -5.6605316
#> 28 -12.9382973 -3.91968801 -3.18034930 -5.8111433
#> 29 -12.6763976 -4.38618647 -3.30824987 -5.9193312
#> 30 -12.6579397 -4.23877232 -3.21085014 -5.7904351
#> 31 -12.8523757 -4.32407726 -3.22199201 -5.7673880
#> 32 -12.7654688 -4.25538759 -3.07373691 -5.7606879
#> 33 -3.6235223 -4.91064336 -1.56115591 3.4708372
#> 34 -3.4458466 -4.92697132 -1.48567832 3.4337825
#> 35 -4.0984301 -5.13253003 -1.54917078 3.7472984
#> 36 -4.0884506 -5.10579365 -1.60016265 3.7181736
#> 37 -3.2161215 -5.07905775 -1.41022255 3.0793206
#> 38 -3.3191084 -4.86694168 -1.21273403 3.0227985
#> 39 -3.6060405 -4.80271950 -1.48193669 3.3153556
#> 40 -3.6180482 -4.72466072 -1.48488825 3.2433159
#> 41 -3.7509792 -4.49423669 0.04142865 4.0096587
#> 42 -3.8267858 -4.70819938 -0.12014209 3.9890622
#> 43 -3.9917985 -4.48472462 0.14972124 4.3878792
#> 44 -4.0204393 -4.57618905 0.06838280 4.3851444
#> 45 -3.5877174 -4.54649754 -0.03229664 3.8584483
#> 46 -3.5841310 -4.44843310 -0.02090905 3.7776288
#> 47 -3.8222620 -4.39510099 0.08653729 4.0638388
#> 48 -3.8998471 -4.58496269 0.29479195 3.9317465
#> 49 -3.4134773 -4.77402586 -1.78519760 3.2474280
#> 50 -2.9898122 -4.91930917 -1.58079954 3.2486680
#> 51 -3.3681871 -4.77936034 -1.88442499 3.2883518
#> 52 -3.1551304 -4.75279608 -1.48413122 3.4629728
#> 53 -3.0980536 -4.89347205 -1.55089180 2.9162582
#> 54 -2.8521995 -5.03580372 -1.38141953 3.1241316
#> 55 -3.0297219 -4.72766636 -1.34721548 2.7238592
#> 56 -2.9560954 -4.75735285 -1.44623139 3.0659934
#> 57 -3.5606057 -4.57721779 -0.74621370 3.5289767
#> 58 -3.2128852 -4.73706666 -0.84359285 3.3265813
#> 59 -3.5852461 -4.62210412 -0.81040517 3.5955839
#> 60 -3.3050724 -4.64622936 -0.76347205 3.3726382
#> 61 -3.5302254 -4.43202011 -0.63467169 3.4073348
#> 62 -3.1359512 -4.59589280 -0.73088477 3.0200295
#> 63 -3.1561241 -4.40671484 -0.64115602 3.3529919
#> 64 -3.0963427 -4.70731269 -0.79491371 3.1298608
#> 65 -6.0692581 21.10502439 0.72412017 1.7842678
#> 66 -6.0058732 21.16506345 0.66039424 1.6709371
#> 67 -5.1091979 21.13642270 2.73228733 3.2046088
#> 68 -5.0106859 20.98888915 2.59465095 3.2392957
#> 69 -5.9308868 21.24378671 0.65728187 1.6317569
#> 70 -6.0804149 21.08925246 0.61063712 1.6951147
#> 71 -5.1789480 21.01138083 2.54281927 3.1515817
#> 72 -5.0009887 20.93423404 2.52147063 3.2618789
#> 73 -5.9825168 21.19587447 0.78987163 1.8354681
#> 74 -6.1841144 20.99168730 0.83012569 1.9914590
#> 75 -5.0567839 21.09178841 2.70227892 3.1444192
#> 76 -4.7653762 20.94306109 2.65085362 3.0047076
#> 77 -5.9704225 21.21327866 0.90309372 1.9487579
#> 78 -6.1471899 21.03108638 0.82449009 1.9739816
#> 79 -5.2991892 20.90164664 2.59621563 2.9627018
#> 80 -5.1496617 21.14265745 2.76901673 3.0602014
#> 81 -4.6704701 -5.42426404 -1.55147387 4.1375588
#> 82 -4.6764571 -5.49607137 -1.64712373 3.8924039
#> 83 -4.7663289 -5.43939696 -1.51724305 4.2935978
#> 84 -4.7784755 -5.33444428 -1.49891787 4.3993260
#> 85 -4.4951014 -5.28026220 -1.56406576 3.9175227
#> 86 -4.4831269 -5.39870583 -1.67989091 3.8621398
#> 87 -4.7547859 -5.44383470 -1.56425820 4.2842275
#> 88 -4.6654754 -5.28960501 -1.63284357 4.3832188
#> 89 -4.3528951 -4.78978486 -0.23925707 4.6692858
#> 90 -4.3727743 -4.79344277 -0.26232808 4.7642112
#> 91 -4.4500515 -4.88103656 -0.27901455 4.8265510
#> 92 -4.6099671 -5.00033650 -0.47551525 4.8092532
#> 93 -4.2087229 -4.63174438 0.01372405 4.6146335
#> 94 -4.0801048 -4.50989319 -0.14585606 4.7520429
#> 95 -4.4556452 -4.88468948 -0.26219581 4.7328890
#> 96 -4.6358591 -5.05352054 -0.44421632 4.8610913
#> 97 18.1800557 -0.18879403 1.82770122 -1.6448660
#> 98 18.2564597 -0.43282885 2.10505901 -1.6435775
#> 99 18.1053040 -0.47528797 1.97895195 -1.4366147
#> 100 18.3693135 -0.58803426 2.18717770 -1.5753198
#> 101 18.0786614 -0.15532924 1.80498812 -1.6658405
#> 102 18.3466583 -0.34072159 1.93305497 -1.7449864
#> 103 18.3347259 -0.44332464 2.11625865 -1.5569134
#> 104 18.5222308 -0.61568107 2.14877132 -1.6698478
#> 105 18.0518297 -0.97387736 1.98859919 -0.9701503
#> 106 18.2783892 -0.95322185 2.18282346 -1.2442289
#> 107 18.3167545 -1.08085302 2.23923808 -1.1708491
#> 108 18.2987067 -1.03040967 2.29937036 -1.2590572
#> 109 18.0488344 -0.86962342 2.13235763 -0.7949307
#> 110 17.9130110 -1.02652666 2.14880877 -1.0448502
#> 111 18.3018344 -0.97605354 2.25622411 -1.1631666
#> 112 18.2298245 -1.10020923 2.36755245 -1.1648666
#> 113 17.6426907 -0.08518581 1.72287227 -1.4596952
#> 114 17.7500805 -0.14675786 1.61272436 -1.3220432
#> 115 17.9985721 -0.06971977 1.99165564 -1.7232374
#> 116 18.1321692 -0.05752845 1.94050999 -1.8010649
#> 117 17.6712943 0.14862500 1.59909406 -1.4340175
#> 118 17.6087791 0.09287174 1.46551245 -1.2583372
#> 119 18.0293186 0.03483318 1.55531670 -1.4842938
#> 120 18.0345691 0.02651472 1.52575862 -1.3740090
#> 121 17.6673560 -0.84172100 1.66844407 -0.7776871
#> 122 17.7818971 -0.73330133 1.76093531 -0.7766115
#> 123 17.7601514 -1.12143759 2.16380579 -0.7238862
#> 124 17.9198974 -1.17282984 2.06535646 -0.8307642
#> 125 17.3745836 -0.70324486 1.71423419 -0.8981729
#> 126 17.4321751 -0.77788941 1.70624801 -0.8376455
#> 127 17.5327901 -1.00887665 1.71744965 -0.8391405
#> 128 17.5198197 -1.06507693 1.80176618 -0.7972513
plot(x)
datos <- Cardiological
datos
#> # A tibble: 11 Ă— 3
#> Pulse Syst Diast
#> <symblc_n> <symblc_n> <symblc_n>
#> 1 [44.00 : 68.00] [90.00 : 100.00] [50.00 : 70.00]
#> 2 [60.00 : 72.00] [90.00 : 130.00] [70.00 : 90.00]
#> 3 [56.00 : 90.00] [140.00 : 180.00] [90.00 : 100.00]
#> 4 [70.00 : 112.00] [110.00 : 142.00] [80.00 : 108.00]
#> 5 [54.00 : 72.00] [90.00 : 100.00] [50.00 : 70.00]
#> 6 [70.00 : 100.00] [130.00 : 160.00] [80.00 : 110.00]
#> 7 [63.00 : 75.00] [60.00 : 100.00] [140.00 : 150.00]
#> 8 [72.00 : 100.00] [130.00 : 160.00] [76.00 : 90.00]
#> 9 [76.00 : 98.00] [110.00 : 190.00] [70.00 : 110.00]
#> 10 [86.00 : 96.00] [138.00 : 180.00] [90.00 : 110.00]
#> 11 [86.00 : 100.00] [110.00 : 150.00] [78.00 : 100.00]
x <- sym.umap(datos)
x
#> V1 V2 V3
#> 1 1.03789350 3.19928497 3.25531753
#> 2 0.76196281 3.11371305 2.56304581
#> 3 0.91967789 3.40111736 3.05698543
#> 4 0.52409080 2.93766812 2.47190811
#> 5 1.21476827 3.24069895 3.29515059
#> 6 1.16066986 2.50561435 2.17703458
#> 7 1.19555493 2.88711927 3.03962004
#> 8 0.85857254 2.40791418 1.88597911
#> 9 1.15320471 2.59280711 2.72421569
#> 10 1.03406138 2.27471281 1.94488591
#> 11 0.98668478 0.90700037 0.55198857
#> 12 0.96306694 0.79738598 0.34614196
#> 13 0.98121541 2.10620519 2.43585564
#> 14 0.92105523 1.85971647 2.14090582
#> 15 0.57673857 0.26064599 0.48739748
#> 16 0.80824129 0.23916550 0.64666741
#> 17 0.69104503 0.27644262 0.20372260
#> 18 -0.90294276 -1.44428251 -2.05582799
#> 19 0.58371059 0.29635288 -0.38599627
#> 20 0.09689016 -0.54731753 -3.09735026
#> 21 0.63679943 -0.06519420 0.20043008
#> 22 -0.68998608 -1.58540226 -1.58291803
#> 23 0.66651302 0.02967599 -0.44094682
#> 24 0.30868309 -0.47987173 -3.14714893
#> 25 0.91132433 1.65436189 1.54114879
#> 26 -1.76404847 -1.62834844 -2.01486981
#> 27 0.96551757 0.67342734 0.08133585
#> 28 -1.22338535 -1.62401153 -2.63768288
#> 29 0.06788396 -0.46771089 0.85358019
#> 30 -1.27791385 -2.24997002 -1.29143344
#> 31 0.29255152 -0.38481663 0.39500773
#> 32 -0.81745339 -2.26577850 -1.63930986
#> 33 0.57622613 3.25130450 3.02339195
#> 34 0.74321152 2.97878763 2.22917119
#> 35 0.68360148 3.44620512 2.83555857
#> 36 0.59501225 2.97522767 2.36737508
#> 37 1.08789593 2.81728202 2.91933108
#> 38 1.18702660 2.10936189 2.04308015
#> 39 1.05200636 2.84289547 3.03562138
#> 40 1.10261562 2.06962060 1.76013648
#> 41 0.86491315 0.62464226 0.49237410
#> 42 -1.29421540 -1.78701684 -2.33980927
#> 43 0.83166457 0.54362850 -0.42389428
#> 44 -0.60022361 -1.41767443 -2.94749824
#> 45 0.13337842 -0.45969351 0.73256151
#> 46 -0.86156754 -2.19124374 -1.32099952
#> 47 0.38461319 -0.41230284 -0.26516427
#> 48 -0.43923078 -1.98201911 -2.13931468
#> 49 -0.61858048 -1.25414964 3.03523895
#> 50 -0.75666361 -1.33621524 3.26765037
#> 51 -0.92215026 -1.14892943 3.06926801
#> 52 -0.94588102 -1.49162072 3.20112832
#> 53 -0.50074003 -1.18349889 2.95945186
#> 54 -0.74471746 -1.35102203 3.13822484
#> 55 -0.63098798 -1.18449520 3.35776962
#> 56 -1.03111490 -1.59221016 3.44561137
#> 57 0.88568216 0.75410850 0.74176938
#> 58 -1.41884825 -1.59010667 -2.44002389
#> 59 0.76798854 0.50310714 -0.60646159
#> 60 -0.87289783 -1.37530850 -3.09921793
#> 61 0.54442085 0.09698613 0.72525230
#> 62 -1.41270445 -1.98139202 -1.96607202
#> 63 0.72457122 0.57531649 -0.76655663
#> 64 -0.51748463 -1.53364633 -2.79895998
#> 65 1.00663051 1.80204568 1.36099200
#> 66 -1.75460973 -1.33791027 -2.20964586
#> 67 0.62864861 0.22967051 -1.23746753
#> 68 -0.31845352 -0.81888850 -3.23108584
#> 69 -0.17649903 -0.70899899 0.82427354
#> 70 -1.34842055 -2.11723993 -1.27895316
#> 71 0.41924577 -0.36488031 -2.79520616
#> 72 0.42010906 -0.70632638 -3.00861366
#> 73 -0.78307816 -1.20243843 -1.72524922
#> 74 -1.06502780 -1.91019428 -2.20132249
#> 75 0.03588586 -0.43433330 -2.95675550
#> 76 -0.13060864 -0.68121283 -3.21714492
#> 77 -0.57250633 -1.53528938 -1.36311723
#> 78 -0.71830405 -2.02429060 -1.58953581
#> 79 0.59266350 -0.48520823 -3.07947965
#> 80 0.67538472 -0.55236300 -3.05782806
#> 81 -1.69119009 -1.06391176 -1.81100092
#> 82 -1.83237426 -1.49713066 -2.05861929
#> 83 -0.61060328 -1.09668559 -2.03044672
#> 84 -1.00508690 -1.61286857 -2.86203788
#> 85 -1.24425556 -1.74253344 -0.82948353
#> 86 -1.60385587 -1.93033915 -1.20005870
#> 87 -0.38393695 -1.31600720 -1.68711318
#> 88 -0.77922480 -2.12692014 -2.02593505
plot(x)
data(oils)
datos <- oils
interval.length(datos)
#> GRA FRE IOD SAP
#> L 0.005 9 34 78
#> P 0.007 1 16 9
#> Co 0.002 5 14 9
#> S 0.006 2 12 6
#> Ca 0.001 10 2 4
#> O 0.005 6 11 9
#> B 0.010 8 8 9
#> H 0.006 10 24 12
data("hardwoodBrito")
Hardwood.histogram<-hardwoodBrito
Hardwood.cols<-colnames(Hardwood.histogram)
Hardwood.names<-row.names(Hardwood.histogram)
Hardwood.histogram
#> # A tibble: 5 Ă— 4
#> ANNT JULT ANNP MITM
#> * <symblc_h> <symblc_h> <symblc_h> <symblc_h>
#> 1 <hist> <hist> <hist> <hist>
#> 2 <hist> <hist> <hist> <hist>
#> 3 <hist> <hist> <hist> <hist>
#> 4 <hist> <hist> <hist> <hist>
#> 5 <hist> <hist> <hist> <hist>
Hardwood.histogram[[1]][[1]]
#> $breaks
#> [1] -3.9 4.2 10.3 20.6
#>
#> $props
#> [1] 0.5 0.4 0.1
weighted.center<-weighted.center.Hist.RSDA(Hardwood.histogram)
BIN.Matrix<-matrix(rep(3,length(Hardwood.cols)*length(Hardwood.names)),nrow = length(Hardwood.names))
pca.hist<-sym.histogram.pca(Hardwood.histogram,BIN.Matrix)
#> Warning: Setting row names on a tibble is deprecated.
#> Setting row names on a tibble is deprecated.
#> Setting row names on a tibble is deprecated.
#> Setting row names on a tibble is deprecated.
pca.hist$classic.PCA
#> **Results for the Principal Component Analysis (PCA)**
#> The analysis was performed on 85 individuals, described by 4 variables
#> *The results are available in the following objects:
#>
#> name description
#> 1 "$eig" "eigenvalues"
#> 2 "$var" "results for the variables"
#> 3 "$var$coord" "coord. for the variables"
#> 4 "$var$cor" "correlations variables - dimensions"
#> 5 "$var$cos2" "cos2 for the variables"
#> 6 "$var$contrib" "contributions of the variables"
#> 7 "$ind" "results for the individuals"
#> 8 "$ind$coord" "coord. for the individuals"
#> 9 "$ind$cos2" "cos2 for the individuals"
#> 10 "$ind$contrib" "contributions of the individuals"
#> 11 "$ind.sup" "results for the supplementary individuals"
#> 12 "$ind.sup$coord" "coord. for the supplementary individuals"
#> 13 "$ind.sup$cos2" "cos2 for the supplementary individuals"
#> 14 "$call" "summary statistics"
#> 15 "$call$centre" "mean of the variables"
#> 16 "$call$ecart.type" "standard error of the variables"
#> 17 "$call$row.w" "weights for the individuals"
#> 18 "$call$col.w" "weights for the variables"
pca.hist$sym.hist.matrix.PCA
#> # A tibble: 5 Ă— 4
#> PC.1 PC.2 PC.3 PC.4
#> * <symblc_h> <symblc_h> <symblc_h> <symblc_h>
#> 1 <hist> <hist> <hist> <hist>
#> 2 <hist> <hist> <hist> <hist>
#> 3 <hist> <hist> <hist> <hist>
#> 4 <hist> <hist> <hist> <hist>
#> 5 <hist> <hist> <hist> <hist>
ACER.p1<-Sym.PCA.Hist.PCA.k.plot(data.sym.df = pca.hist$Bins.df,
title.graph = " ",
concepts.name = c("ACER"),
title.x = "First Principal Component (84.83%)",
title.y = "Frequency",
pca.axes = 1)
ACER.p1
ALL.p1<-Sym.PCA.Hist.PCA.k.plot(data.sym.df = pca.hist$Bins.df,
title.graph = " ",
concepts.name = unique(pca.hist$Bins.df$Object.Name),
title.x = "First Principal Component (84.83%)",
title.y = "Frequency",
pca.axes = 1)
ALL.p1
#> Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
#> increasing max.overlaps
Hardwood.quantiles.PCA<-quantiles.RSDA(pca.hist$sym.hist.matrix.PCA,3)
#> Warning in min(which(props.cum >= percentils.RSDA[i])): no non-missing
#> arguments to min; returning Inf
#> Warning: Setting row names on a tibble is deprecated.
label.name<-"Hard Wood"
Title<-"First Principal Plane"
axes.x.label<- "First Principal Component (84.83%)"
axes.y.label<- "Second Principal Component (9.70%)"
concept.names<-c("ACER")
var.names<-c("PC.1","PC.2")
quantile.ACER.plot<-Percentil.Arrow.plot(Hardwood.quantiles.PCA,
concept.names,
var.names,
Title,
axes.x.label,
axes.y.label,
label.name
)
quantile.ACER.plot
label.name<-"Hard Wood"
Title<-"First Principal Plane"
axes.x.label<- "First Principal Component (84.83%)"
axes.y.label<- "Second Principal Component (9.70%)"
concept.names<-row.names(Hardwood.quantiles.PCA)
var.names<-c("PC.1","PC.2")
quantile.plot<-Percentil.Arrow.plot(Hardwood.quantiles.PCA,
concept.names,
var.names,
Title,
axes.x.label,
axes.y.label,
label.name
)
quantile.plot
#> Warning: Removed 1 row containing missing values or values outside the scale range
#> (`geom_point()`).
#> Warning: Removed 1 row containing missing values or values outside the scale range
#> (`geom_segment()`).
label.name<-"Hard Wood"
Title<-"First Principal Plane"
axes.x.label<- "PC 1 (84.83%)"
axes.y.label<- "PC 2 (9.70%)"
concept.names<-c("ACER")
var.names<-c("PC.1","PC.2")
plot.3D.HW<-sym.quantiles.PCA.plot(Hardwood.quantiles.PCA,
concept.names,
var.names,
Title,
axes.x.label,
axes.y.label,
label.name)
plot.3D.HW
concept.names<-row.names(Hardwood.quantiles.PCA)
sym.all.quantiles.plot(Hardwood.quantiles.PCA,
concept.names,
var.names,
Title,
axes.x.label,
axes.y.label,
label.name)
#> Warning: Ignoring 4 observations
sym.all.quantiles.mesh3D.plot(Hardwood.quantiles.PCA,
concept.names,
var.names,
Title,
axes.x.label,
axes.y.label,
label.name)
Hardwood.quantiles.PCA.2<-quantiles.RSDA.KS(pca.hist$sym.hist.matrix.PCA,100)
#> Warning: Setting row names on a tibble is deprecated.
h<-Hardwood.quantiles.PCA.2[[1]][[1]]
tmp<-HistRSDAToEcdf(h)
h2<-Hardwood.quantiles.PCA.2[[1]][[2]]
tmp2<-HistRSDAToEcdf(h2)
h3<-Hardwood.quantiles.PCA.2[[1]][[3]]
tmp3<-HistRSDAToEcdf(h3)
h4<-Hardwood.quantiles.PCA.2[[1]][[4]]
tmp4<-HistRSDAToEcdf(h4)
h5<-Hardwood.quantiles.PCA.2[[1]][[5]]
tmp5<-HistRSDAToEcdf(h5)
breaks.unique<-unique(c(h$breaks,h2$breaks,h3$breaks,h4$breaks,h5$breaks))
tmp.unique<-breaks.unique[order(breaks.unique)]
tmp<-tmp(v = tmp.unique)
tmp2<-tmp2(v = tmp.unique)
tmp3<-tmp3(v = tmp.unique)
tmp4<-tmp4(v = tmp.unique)
tmp5<-tmp5(v = tmp.unique)
abs_dif <- abs(tmp2 - tmp)
# La distancia Kolmogorov–Smirnov es el máximo de las distancias absolutas.
distancia_ks <- max(abs_dif)
distancia_ks
#> [1] 0.05857869
library(tidyr)
# Se unen los valores calculados en un dataframe.
df.HW <- data.frame(
PC.1 = tmp.unique,
ACER = tmp,
ALNUS = tmp2,
FRAXINUS = tmp3,
JUGLANS = tmp4,
QUERCUS = tmp5
) %>%
pivot_longer(
cols = c(ACER, ALNUS,FRAXINUS,JUGLANS,QUERCUS),
names_to = "HardWood",
values_to = "ecdf"
)
grafico_ecdf <- ggplot(data = df.HW,
aes(x = PC.1, y = ecdf, color = HardWood)) +
geom_line(size = 1) +
labs(
color = "Hardwood",
y = "Empirical Cumulative Distribution "
) +
theme_bw() +
theme(legend.position = "bottom",
plot.title = element_text(size = 12))+geom_line()
grafico_ecdf